home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Cream of the Crop 25
/
Cream of the Crop 25.iso
/
os2
/
gnuwget.zip
/
wget-1.4.3
/
src
/
retr.c
< prev
next >
Wrap
C/C++ Source or Header
|
1997-02-09
|
14KB
|
554 lines
/* File retrieval.
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <errno.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif /* HAVE_STRING_H */
#include <ctype.h>
#include <assert.h>
#include "wget.h"
#include "options.h"
#include "utils.h"
#include "retr.h"
#include "url.h"
#include "recur.h"
#include "ftp.h"
#include "html.h"
#include "http.h"
#include "host.h"
#include "connect.h"
/* global variables */
extern struct options opt;
/* Buffered input variables: */
char buffer[INPUT_BUFFER_SIZE]; /* The input buffer itself. */
char *buffer_pos; /* The current position in the buffer. */
size_t buffer_left; /* Number of bytes left in the buffer:
buffer_left = buffer_pos - buffer */
/* Variables used for the timer. */
long internal_secs, internal_msecs;
/* The function for reading from file descriptor fd, char-by-char. If
there is anything in the buffer, the character is returned from the
buffer. Otherwise, refill the buffer and return the first
character.
The return value is the same as with read(2). On buffered read, the
function returns 1. */
int
buf_readchar(int fd, char *ret)
{
int res;
if (buffer_left)
{
--buffer_left;
*ret = *buffer_pos++;
}
else
{
buffer_pos = buffer;
buffer_left = 0;
res = iread(fd, buffer, INPUT_BUFFER_SIZE);
if (res <= 0)
return res;
buffer_left = res - 1;
*ret = *buffer_pos++;
}
return 1;
}
/* This is similar to buf_readchar, only it doesn't move the buffer
position. */
int
buf_peek(int fd, char *ret)
{
int res;
if (buffer_left)
*ret = *buffer_pos;
else
{
buffer_pos = buffer;
buffer_left = 0;
res = iread(fd, buffer, INPUT_BUFFER_SIZE);
if (res <= 0)
return res;
buffer_left = res;
*ret = *buffer_pos;
}
return 1;
}
/* Flush the buffer. Its arguments are the pointer where to copy the
buffer contents and how much of the contents may be copied in a
chunk. The return value is the number of bytes actually copied. If
the buffer is empty, 0 is returned. */
int
buf_flush(char *where, int maxsize)
{
int howmuch;
if (!buffer_left)
return 0;
else
{
howmuch = buffer_left <= maxsize ? buffer_left : maxsize;
if (where)
memcpy(where, buffer_pos, howmuch);
buffer_left -= howmuch;
buffer_pos += howmuch;
return howmuch;
}
}
/* Discard the contents of the input buffer. */
void
buf_discard(void)
{
buffer_left = 0;
buffer_pos = buffer;
}
/* Reads the contents of file descriptor fd, until it is closed, or a
read error occurs. The data is read in parts BUFFER_SIZE bytes
long, and stored to stream fp, which should have been open for
writing.
If opt.verbose is set, the progress is also shown. Variable restval
is used to represent a value from which to start downloading (which
will be shown accordingly). If restval is set, the stream should
have been open for appending.
The function exits and returns codes of 0, -1 and -2 if the
connection was closed, there was a read error, or if it could not
write to the output stream, respectively.
IMPORTANT: The function flushes the contents of the buffer in
buf_flush() before actually reading from fd. If you wish to read
from fd immediately, flush or discard the buffer. */
int
get_contents(int fd, FILE *fp, long *len, long restval, int nobuf)
{
int res;
static char c[BUFFER_SIZE];
*len = restval;
/* Initialize show_progress. */
if (opt.verbose)
show_progress(restval, 1);
/* Flush the input buffer if !nobuf. */
if (!nobuf)
{
while ((res = buf_flush(c, BUFFER_SIZE)) != 0)
{
if (!fwrite(c, sizeof(char), res, fp))
return -2;
if (opt.verbose)
{
if (show_progress(res, 0))
fflush(fp);
}
*len += res;
}
}
/* Read from fd while there is available data. */
do
{
res = iread(fd, c, BUFFER_SIZE);
if (res > 0)
{
if (!fwrite(c, sizeof(char), res, fp))
return -2;
if (opt.verbose)
{
if (show_progress(res, 0))
fflush(fp);
}
*len += res;
}
} while (res > 0);
if (res < -1)
res = -1;
if (opt.verbose)
fprintf(opt.lfile, "\n\n");
return res;
}
/* Show the dotted progress report of file loading. Called with length
and a flag to tell it whether to reset or not. It keeps the offset
information in static local variables.
Return value: 1 or 0, designating whether any dots have been drawn.
If the init argument is set, the routine will initialize.
If the res is non-zero, res/line_bytes lines are skipped
(meaning the appropriate number ok kilobytes), and the number of
"dots" fitting on the first line are drawn as ','. */
int
show_progress(long res, int init)
{
static long line_bytes;
static long offs;
static int ndot, nrow;
int any_output;
any_output = 0;
/* init set means initialization. If res is set, it also means that
the retrieval is *not* done from the beginning. The part that
was already retrieved is not shown again. */
if (init == 1)
{
/* Generic initialization of static variables. */
offs = 0L;
ndot = nrow = 0;
line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
if (res)
{
if (res >= line_bytes)
{
nrow = res / line_bytes;
res %= line_bytes;
fprintf(opt.lfile, "\n [ skipping %dK ]",
(int)((nrow * line_bytes) / 1024));
ndot = 0;
}
}
fprintf(opt.lfile, "\n%5ldK ->", nrow * line_bytes / 1024);
}
/* Offset gets incremented by current value. */
offs += res;
/* While offset is >= opt.dot_bytes, print dots, taking care
that every 50th dot needs to be preceded by a status message. */
for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
{
if (!(ndot % opt.dot_spacing))
fputc(' ', opt.lfile);
any_output = 1;
if (init)
fputc(',', opt.lfile);
else
fputc('.', opt.lfile);
++ndot;
if (ndot == opt.dots_in_line)
ndot = 0;
if (ndot == 0)
{
++nrow;
fprintf(opt.lfile, "\n%5ldK ->",
nrow * line_bytes / 1024);
}
}
return any_output;
}
/* A function to reset the internal timer. */
void
reset_timer(void)
{
#ifdef HAVE_GETTIMEOFDAY
struct timeval t;
gettimeofday(&t, NULL);
internal_secs = t.tv_sec;
internal_msecs = t.tv_usec / 1000;
#else
internal_secs = time(NULL);
internal_msecs = 0;
#endif
}
/* The time elapsed from the last call to reset_timer, in msecs. */
long
elapsed_time(void)
{
#ifdef HAVE_GETTIMEOFDAY
struct timeval t;
gettimeofday(&t, NULL);
return ((t.tv_sec - internal_secs) * 1000
+ (t.tv_usec / 1000 - internal_msecs));
#else
return (long)time(NULL) - internal_secs;
#endif
}
/* The function returns pointer to a static char[] buffer in which
zero-terminated string-representation of time (in form hh:mm:ss) is
printed. It is shamelessly non-reentrant, but who cares? :)
If tm is non-NULL, it also returns the time_t of the current time. */
char *
time_str(time_t *tm)
{
static char tms[15];
struct tm *ptm;
time_t tim;
*tms = '\0';
tim = time(tm);
if (tim == -1)
return tms;
ptm = localtime(&tim);
sprintf(tms, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
return tms;
}
/* Print out the appropriate download rate. Appropriate means that if
rate is > 1024 bytes per second, kilobytes are used, and if rate >
1024 * 1024 bps, megabytes are used. */
char *
rate(long bytes, long msecs)
{
static char res[15];
double dlrate;
if (!msecs)
++msecs;
dlrate = (double)1000 * bytes / msecs;
if (dlrate < 1024.0)
sprintf(res, "%.2f B/s", dlrate);
else if (dlrate < 1024.0 * 1024.0)
sprintf(res, "%.2f KB/s", dlrate / 1024.0);
else
sprintf(res, "%.2f MB/s", dlrate / (1024.0 * 1024.0));
return res;
}
/* Retrieve the given URL. Decides which loop to call -- HTTP, FTP, or
simply copy it with file://. */
uerr_t
retrieve_url(const char *origurl, char **file, char **newloc,
const char *refurl, int *dt)
{
urlinfo *u;
uerr_t result;
char *url;
int local_use_proxy, location_changed, dummy, oldrec;
char *mynewloc, *proxy;
/* If dt is NULL, just ignore it. */
if (!dt)
dt = &dummy;
url = nstrdup(origurl);
if (newloc)
*newloc = NULL;
if (file)
*file = NULL;
location_changed = 0;
/* This ugly loop is because of Location headers. */
do
{
u = newurl();
/* Parse the URL. If the new location was gained from the
Location header, we need "strict" parsing. RFC2068 is clear
about `Location:' containing an absoluteURI. */
result = parseurl(url, u, location_changed);
if (result != URLOK)
{
freeurl(u, 1);
if (!opt.quiet)
fprintf(opt.lfile, "%s: %s.\n", url, uerrmsg(result));
return result;
}
/* Set the referer. */
if (refurl)
u->referer = nstrdup(refurl);
else
u->referer = NULL;
local_use_proxy = USE_PROXY(u);
if (local_use_proxy)
{
urlinfo *pu;
pu = newurl();
/* Copy the original URL to new location. */
memcpy(pu, u, sizeof(*u));
pu->proxy = NULL; /* A minor correction :) */
/* Initialize u to nil. */
memset(u, 0, sizeof(*u));
u->proxy = pu;
/* Get the appropriate proxy, according to protocol. */
proxy = getproxy(pu->proto);
if (!proxy)
{
if (!opt.quiet)
fprintf(opt.lfile, "Could not find proxy host.\n");
freeurl(u, 1);
return PROXERR;
}
/* Parse the proxy URL. */
result = parseurl(proxy, u, 0);
if (result != URLOK || u->proto != URLHTTP)
{
if (!opt.quiet)
{
if (u->proto == URLHTTP)
fprintf(opt.lfile, "Proxy %s: %s.\n", proxy,
uerrmsg(result));
else
fprintf(opt.lfile, "Proxy %s: Must be HTTP.\n", proxy);
}
freeurl(u, 1);
return PROXERR;
}
u->proto = URLHTTP;
} /* local_use_proxy */
assert(u->proto != URLFILE); /* For now... */
mynewloc = NULL;
if (u->proto == URLHTTP)
result = http_loop(u, &mynewloc, dt);
else if (u->proto == URLFTP)
{
/* If the location has changed, we must not allow recursive
FTP retrieval, so we save recursion to oldrec, and restore
it later. */
oldrec = opt.recursive;
if (location_changed)
opt.recursive = 0;
result = ftp_loop(u, dt);
if (location_changed)
opt.recursive = oldrec;
/* There is a possibility of having HTTP being redirected to
FTP. In these cases we must decide whether the text is
HTML according to the suffix. The HTML suffixes are
`.html' and (yuck!) `.htm', case-insensitive. */
if (location_changed && u->local && (u->proto == URLFTP ))
{
char *suf = suffix(u->local);
if (suf && (!strcasecmp(suf, "html") || !strcasecmp(suf, "htm")))
*dt |= TEXTHTML;
}
}
location_changed = (result == NEWLOCATION);
if (location_changed)
{
/* Check for redirection to oneself. */
if (url_equal(url, mynewloc))
{
if (!opt.quiet)
fprintf(opt.lfile, "%s: Redirection to itself.\n", mynewloc);
return WRONGCODE;
}
}
if (mynewloc)
{
free(url);
url = mynewloc;
}
if (!location_changed && file)
{
if (u->local)
*file = nstrdup(u->local);
else
*file = NULL;
}
freeurl(u, 1);
} while (location_changed);
if (newloc)
*newloc = url;
else
free(url);
return result;
}
/* Find the URL-s in the file and call retrieve_url for each of
them. If the html is non-zero, treat the file as HTML, and
construct the URL-s accordingly.
If recursive is set, recursive_retrieve will be called after each
file. */
uerr_t
retrieve_from_file(const char *file, int html, int *count)
{
char *filename, *new;
int first_time, dt;
uerr_t status;
urlpos *url_list, *cur_url;
/* If spider-mode is on, we do not want get_urls_html barfing
errors on baseless links. */
url_list = (html ? get_urls_html(file, NULL, opt.spider)
: get_urls_file(file));
status = RETROK; /* Suppose everything is OK. */
*count = 0; /* Reset the URL count. */
for (first_time = 1, cur_url = url_list; cur_url;
cur_url = cur_url->next, ++*count)
{
if (opt.quota && opt.downloaded > opt.quota)
{
status = QUOTEXC;
break;
}
status = retrieve_url(cur_url->url, &filename, &new, NULL, &dt);
if (opt.recursive && status == RETROK && (dt & TEXTHTML))
status = recursive_retrieve(filename, new ? new : cur_url->url,
first_time ? RFIRST_TIME : 0);
if (new)
free(new);
if (filename)
free(filename);
first_time = 0;
}
/* Free the linked list of URL-s. */
free_urlpos(url_list);
return status;
}
/* Print 'giving up', or 'retrying', depending on the action to
do. Numbers represent the attempt number and the attempt limit
(please don't ask which one is which). */
void
printwhat(int n1, int n2)
{
if (opt.verbose)
{
if (n1 == n2)
fprintf(opt.lfile, "Giving up.\n\n");
else
fprintf(opt.lfile, "Retrying.\n\n");
}
}